### This R script produces Figure 1 in the main text of "Triangulating the relationship between education and immigration attitudes".
### Figure 1 consists of two parts: the upper subfigure shows the distribution of MZ twin differences in educational attainment
### the bottom subfigure shows the distribution of educational attainment by reform treatment
library(haven)
library(ggplot2)
library(dplyr)
library(psych)
library(gridExtra)


# Read in EduIA.dta produced by Data_EduIA.do and Analysis_EduIA.do 
data <- read_dta("H:/Dofiles/Edu_IA/EduIA.dta")

## Distribution of MZ twin differences in educational attainment
# Calculate twin differences in education and identify the lower level and higher level within a pair 
diffEdu_MZ <- data %>%
  select(c("education_year", "LopNrParID", "TwinNr", "Zyg", "sampleMZ")) %>%
  filter(sampleMZ == 1) %>% # Use the MZ twin sample as in Study 1
  na.omit() %>%
  group_by(LopNrParID) %>%
  summarize(diff_edu = abs(diff(education_year)), #absolute value of twin differences
            lower_edu = min(education_year), #lower level education in each pair, for the plot later
            twin_type = ifelse(education_year[1] > education_year[2], "high", "low"), #an indicator for twins with higher or lower educational attainment
            education_year = education_year, #essential variables for plotting later
            LopNrParID = LopNrParID,
            TwinNr = TwinNr,
            Zyg = Zyg,
            .groups = "drop")

# Create the plot with twin pairs ordered by the lower level of education
line_diffEdu_MZ <- diffEdu_MZ %>%
  ggplot(mapping = aes(x = factor(reorder(LopNrParID, lower_edu)), y = education_year, group = LopNrParID)) +
  geom_point(aes(shape = twin_type), size = 1, alpha = 0.5) +
  geom_line(color = "black", linewidth = 0.4, alpha = 0.5) +
  theme_bw() +
  scale_shape_manual(values = c(1, 1)) +
  scale_y_continuous(breaks = unique(data$education_year)) +
  theme(legend.position = "none",
        panel.grid.major.x = element_blank(),
        axis.line = element_line(),
        axis.text.x = element_blank()) +
  xlab("MZ twin pairs (N=922, mean=1.25, sd=1.59)") +
  ylab("Years of education") +
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 12)) +
  ggtitle("MZ twin differences in educational attainment")

line_diffEdu_MZ


## Distribution of educational attainment by education reform treatment
# Descriptive statistics for the control group
desc_control <- data %>%
  filter(sampleReform == 1) %>% # Use the Reform sample as in Study 2
  select(c("treatment", "education_year", "tertiary_edu", "im_avg", "im_irt", "female", "age_2009")) %>%
  na.omit() %>%
  subset(treatment == 0) %>% # subset the dataset with observations in the control group
  describe() %>%
  data.frame()

desc_control <- desc_control[c("n", "mean", "sd", "min", "max")]

print(desc_control)


# Descriptive statistics for the treatment group
desc_treatment <- data %>%
  filter(sampleReform == 1) %>%
  select(c("treatment", "education_year", "tertiary_edu", "im_avg", "im_irt", "female", "age_2009")) %>%
  na.omit() %>%
  subset(treatment == 1) %>% # subset the dataset with observations in the treatment group
  describe() %>%
  data.frame()

desc_treatment <- desc_treatment[c("n", "mean", "sd", "min", "max")]

print(desc_treatment)


# Distribution of treatment status
dis_reform <- data %>%
  select(c("treatment", "education_year", "sampleReform")) %>%
  na.omit() %>%
  filter(sampleReform == 1) %>%
  ggplot(aes(x = factor(treatment), y = education_year)) +
  theme_bw() +
  geom_point(position = position_jitter(width = 0.3, height = 0.3), alpha = 0.3) +
  geom_boxplot(alpha = 0.2) +
  scale_y_continuous(breaks = unique(data$education_year)) +
  ylab("Years of education") +
  xlab("Control (N=2201, mean=11.54, sd=2.89)    Treated (N=1554, mean=12.35, sd=2.41) ") +
  theme(axis.ticks.x = element_blank(),
        axis.text.x=element_blank()) +
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 12)) +
  ggtitle("Distribution of educational attainment by reform treatment")

dis_reform

Figure1 <- grid.arrange(line_diffEdu_MZ, dis_reform, ncol = 1)

ggsave(filename = "H:/Dofiles/Edu_IA/Results/Figure1.pdf", Figure1, width = 7, height = 9, dpi = 300, device = "pdf")

dev.off()
